#!/usr/bin/env python
#coding=utf-8
'''
Created on Jul 30, 2010

@author: lxd
'''
from common.web import Web
import re

class TsinaWeb(Web):
    def __init__(self, logfile='', interval_time=180):    
        self.host = 't.sina.com.cn'
        Web.__init__(self, logfile=logfile, interval_time=interval_time) 
               
    def getMain(self):               
        referer = ''
        url = 'http://t.sina.com.cn/'
        return self.fetchData(referer, url) 

def parseTsina(f):
    data = f.read()
    data = re.sub(r'\&[a-zA-Z]{1,10};', '', data)#过滤&lt;
    
    reg = r'<div class="twit_item_content"> <a href="http://t\.sina\.com\.cn/.+" uid="\d+" target="_blank" title=".+">.+</a>：(?P<content>[^<>].*?)<div class="twit_item_time">.+</div></div>' 
    p = re.compile(reg, re.IGNORECASE)
    m = p.findall(data) 
    return m  
    
if __name__ == '__main__':
    tsinaWeb = TsinaWeb()
    f = tsinaWeb.getMain()
    tsina = parseTsina(f)
    if tsina:
        print tsina
    else:
        print 'no twitter'    
